###Panel packages 

##Install and Load Packages
list.of.packages <- c("AER", "sandwich", "lmtest", "car", "dplyr", "stargazer", "ggplot2", "foreign",
                      "openintro","OIdata", "gdata", "doBy","ivpack", "psych","plm", "readxl", "robustHD")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)
##

##Path to database
setwd("d:/")
Base_de_dados_R<-read_excel(file.choose(), sheet = "basefinal")
 

attach(Base_de_dados_R)

#to check outliers
#boxplot(Base_de_dados_R)


###### descritive statistics

#datadescstat<-as.data.frame(Base_de_dados_R[c("ROA", "ROI", "ROE", "ENDM",	"LIQCOR",	"TAM",	"RENT",	"CRESESP",	"CRESVEN",	"QTOBIN",	"SHARPE")])
#datacolnames<-c("ROA", "ROI", "ROE", "ENDM", "LIQCOR",	"TAM",	"RENT",	"CRESESP",	"CRESVEN",	"QTOBIN",	"SHARPE")

#stargazer(datadescstat, title="Descriptive Statistics", digits=3,
#          covariate.labels=datacolnames)

#list(stargazer)
#correlation.matrix <- cor(datadescstat,method = "pearson", use = "complete.obs")
#correlation.matrix
#stargazer(correlation.matrix, title="Correlation Matrix",digits=3)

#xtset<-c("Empresa","Ano")
#fit <- lm(DP_LL ~ Final_Bovinos	+ Final_Aves	+ Final_Suinos +	Final_Floresta +	Final_acucar +	Final_algodao +	Final_Milho +	Final_Soja + Final_Trigo + LN_M_AT + M_ABT + M_END, data=Base_de_dados_R)
#summary(fit)
#print(fit)
# show results

#which(!is.na(

###### Regression

regression <- lm(Base_de_dados_R$DP_LL ~ Base_de_dados_R$Final_Bovinos	+ Base_de_dados_R$Final_Aves	+ Base_de_dados_R$Final_Suinos +	Base_de_dados_R$Final_Floresta +	Base_de_dados_R$Final_acucar +	Base_de_dados_R$Final_algodao +	Base_de_dados_R$Final_Milho +	Base_de_dados_R$Final_Soja + Base_de_dados_R$Final_Trigo + Base_de_dados_R$LN_M_AT + Base_de_dados_R$M_ABT + Base_de_dados_R$M_END)
plot(regression)
xyplot(regression)
summary(regression)
print(regression)


formula<-as.formula(Base_de_dados_R$DP_LL ~ Base_de_dados_R$Final_Bovinos	+ Base_de_dados_R$Final_Aves	+ Base_de_dados_R$Final_Suinos +	Base_de_dados_R$Final_Floresta +	Base_de_dados_R$Final_acucar +	Final_algodao +	Base_de_dados_R$Final_Milho +	Base_de_dados_R$Final_Soja + Base_de_dados_R$Final_Trigo + Base_de_dados_R$LN_M_AT + Base_de_dados_R$M_ABT + Base_de_dados_R$M_END)
group<-Base_de_dados_R$Empresa

pooled <- plm(formula, data = dataset, index = xtset, model="pooling")
summary(pooled)
fixed <- plm(formula, data = dataset, index = xtset,  model="within")
summary(fixed)
random <- plm(formula, data = dataset, index = xtset,  model="random")
summary(random)









#Base_de_dados_R$DP_LL
#summary(Base_de_dados_R$DP_LL)
#boxplot(Base_de_dados_R$DP_LL)
#length(Base_de_dados_R$DP_LL)

#WDP_LL <- winsor(Base_de_dados_R$DP_LL, trim = 0.10, na.rm = TRUE)
#summary(WDP_LL)
#boxplot(WDP_LL)
#WDP_LL

#Base_de_dados_R$M_ABT
#summary(Base_de_dados_R$M_ABT)
#boxplot(Base_de_dados_R$M_ABT)
#length(Base_de_dados_R$M_ABT)

#WM_ABT <- winsor(Base_de_dados_R$M_ABT, trim = 0.15, na.rm = TRUE)
#summary(WM_ABT)
#boxplot(WM_ABT)
#WM_ABT
#XXX <- winsorize(Base_de_dados_R$M_ABT)
#summary(XXX)

#Base_de_dados_R$M_END
#summary(Base_de_dados_R$M_END)
#boxplot(Base_de_dados_R$M_END)
#length(Base_de_dados_R$M_END)

#########
data <- c(sample(x = 1:20, size = 40, replace = TRUE), 65, 80, -150)
data
length(data)
summary(data)
boxplot(data)

dataw <- winsorize(data, minval = -5, maxval = 50, probs = c(0,5, 0.95),  na.rm = FALSE)
length(dataw)
summary(dataw)
boxplot(dataw)

## generate data
set.seed(1234)     # for reproducibility
x <- rnorm(10)     # standard normal
x[1] <- x[1] * 10  # introduce outlier

## winsorize data
x
summary(x)
xx <- winsorize(x)
summary(xx)
length(x)



#########
quantile(Base_de_dados_R$DP_LL, probs = seq(0, 1, 0.01), na.rm = FALSE,
         names = TRUE, type = 7)
quantile(scores)


WDP_LL <- winsor(Base_de_dados_R$DP_LL, trim = 0.10, na.rm = TRUE)
summary(WDP_LL)
boxplot(WDP_LL)
WDP_LL

WDP_LL <- winsorize(Base_de_dados_R$DP_LL, minval = NULL, maxval = NULL, probs = c(0.05, 0.95),
          na.rm = FALSE, type = 7)
summary(WDP_LL)
boxplot(WDP_LL)
WDP_LL

wins(Base_de_dados_R$DP_LL) = -c if Base_de_dados_R$DP_LL < -c, c if Base_de_dados_R$DP_LL > c, Base_de_dados_R$DP_LL

WDP_LL <- winsorize(Base_de_dados_R$DP_LL, standardized = FALSE, centerFun = median,
          scaleFun = mad, const = 2, return = c("data", "0.01")

WDP_LL <- winsorize(Base_de_dados_R$DP_LL, standardized = TRUE, return = C(Base_de_dados_R$DP_LL,1), prob = 0.95,  na.rm = FALSE)
summary(WDP_LL)
boxplot(WDP_LL)
WDP_LL

WDP_LL <- winsorize(Base_de_dados_R$DP_LL, standardized = TRUE, centerFun = median,
          scaleFun = mad, return = c("0.05", "0.95"))
summary(WDP_LL)
boxplot(WDP_LL)
WDP_LL

WDP_LL <- winsorize(Base_de_dados_R$DP_LL, standardized = TRUE)
summary(WDP_LL)
WDP_LL
WDP_LL <- winsorize(Base_de_dados_R$DP_LL, minval = NULL, maxval = NULL, probs = c(0.10, 0.90),  na.rm = FALSE)
summary(WDP_LL)

WDP_LL <- winsorize(Base_de_dados_R$DP_LL)
summary(WDP_LL)

WDP_LL <- winsorize(Base_de_dados_R$DP_LL, prob = 0,5)
summary(WDP_LL)

WDP_LL <- Winsorize(Base_de_dados_R$DP_LL,na.rm=T)

wins <- function(Base_de_dados_R$DP_LL)
  ## A helper function for wins.df
  ## x: is a vector
  percentiles <- quantile(Base_de_dados_R$DP_LL, probs=seq(0,1,0.01), na.rm=TRUE)
  pLOWER <- percentiles["1%"]
  pUPPER <- percentiles["99%"]
  x.w <- ifelse(Base_de_dados_R$DP_LL <= pLOWER, pLOWER, Base_de_dados_R$DP_LL)
  x.w <- ifelse(Base_de_dados_R$DP_LL >= pUPPER, pUPPER, Base_de_dados_R$DP_LL.w)
  return(x.w)
}

winsorize(Base_de_dados_R$DP_LL)
summary(WDP_LL)

winsor(Base_de_dados_R$DP_LL)
summary(Base_de_dados_R$DP_LL)

Base_de_dados_R$DP_LL


#setting the bench mark
bench <- 208104 + 1.5*IQR(Base_de_dados_R$DP_LL)
bench

#Winsorizing
Base_de_dados_R$DP_LL[Base_de_dados_R$DP_LL > bench]

Base_de_dados_R$DP_LL[Base_de_dados_R$DP_LL > bench] <- bench

summary(Base_de_dados_R$DP_LL)
length(Base_de_dados_R$DP_LL)
boxplot(Base_de_dados_R$DP_LL)
##set painel data properties
xtset<-c("Empresa","Ano")

dataset<-Base_de_dados_R
